from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import requests
import time
from lxml import etree
import bs4
def getHtml(url):
    try:
        r=requests.get(url)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return "读取失败"
def get_data(u):
   r=getHtml(u)
   soup=BeautifulSoup(r,"lxml")
   data=soup.find('ul',class_='list_view').find_all('li',class_="list_item")
   datafor=[]
   for i in data:
       dicts={}
       dicts['景点名称']=(i.find('h3').find('a',target="_blank")).text
       dicts["景区所在地"]=((i.find('h3').find('span', class_="park_city")).find('a',rel="nofollow")).text
       dicts["游客满意度(百分制)"]=(i.find('p',class_="ticket").find('strong',class_="t_ticket")).text
       dicts["游客点评数"]=(i.find('p',class_="ticket").find('strong',class_="dp f_f60")).text
       dicts['门票价格']=(i.find('div',class_="attri_price").find('span',class_='price f_yh').find('em')).text
       datafor.append(dicts)
   return datafor
def main():
    data=[]
    for i in range(1,288):
        try:
            data.extend(get_data("https://menpiao.tuniu.com/cat_0_0_0_0_0_0_1_1_"+str(i)+'.html'))
            time.sleep(0.3)
        except:
            continue
    p=pd.DataFrame(data)
    p.to_csv(r"C:/Users/gcc/Desktop/quanguo.csv", encoding="utf-8",index=False)
main()